from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter

documents = SimpleDirectoryReader("./data").load_data()

chunk_sizes = [256, 512, 1024]

for chunk_size in chunk_sizes:
    print(f"---------------------------------------------------->")
    print(f"---------------------------------------------------->")
    print(f"---------------------------------------------------->")
    print(f"\n=== 测试 chunk_size: {chunk_size} ===")

    node_parser = SentenceSplitter(
        chunk_size=chunk_size,
        chunk_overlap=20
    )

    nodes = node_parser.get_nodes_from_documents(documents)
    print(f"生成 {len(nodes)} 个文本块")

    # 查看第一个块的内容
    if nodes:
        first_chunk = nodes[0].text
        print(f"第一个块长度: {len(first_chunk)}")
        print(f"内容预览: {first_chunk}...")
